#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

vec3 RGB2YUV(vec3 rgb){
    vec3 yuv = (vec3)(0.0f);
    yuv.x = rgb.x *0.299f +rgb.y *0.587f + rgb.z *0.114f;
    yuv.y = -0.14713f * rgb.x +(-0.28886f) * rgb.y + rgb.z *0.436f;
    yuv.z = rgb.x *0.615f +rgb.y *(-0.51499f) + rgb.z *(-0.10001f);
    return yuv;    
}

vec3 YUV2RGB(vec3 yuv){
    vec3 rgb = (vec3)(0.0f);
    rgb.x = yuv.x + yuv.z * 1.13983f;
    rgb.y = yuv.x +yuv.y *(-0.394f) + yuv.z * (-0.580f);
    rgb.z = yuv.x +yuv.y *(2.03211f) + yuv.z * (0.0f);
    return rgb;
}

float extractLuma(vec3 c)
{
    return c.x * 0.299f + c.y * 0.587f + c.z * 0.114f;
}

__kernel void MAIN(
      __read_only image2d_t input,  
      __write_only image2d_t dest_data,
      __global FilterParam* param,
	  int alpha,  
      int intensity)
{
    int outputW = param->width[1];
	int outputH = param->height[1];
    int W = get_global_size(0);
	int H = get_global_size(1);
    float2 ouputResolution = (float2)(W,H);  
 	
    int2 gl_FragCoord = (int2)(get_global_id(0), get_global_id(1));
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 tc = ((float2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f))/ouputResolution.xy;
    float roiLeft = param->origROI[0];
    float roiTop = param->origROI[1];
    float roiWidth  = param->origROI[2];
    float roiHeight  = param->origROI[3];
    vec2 tranformCoord = (vec2)(roiWidth * tc.x + roiLeft , roiHeight * tc.y + roiTop);
    tc = (float2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
    float4 orig = read_imagef(input, sampler, tc);  	

    vec3 rgb = (vec3)(orig.xyz);
    vec3 yuv =RGB2YUV(rgb);
    
	float factor = ouputResolution.x / 640.0f;
    float accumY = 0.0f; 
    for(int i = -1; i <= 1; ++i) {
        for(int j = -1; j <= 1; ++j) {
            vec2 offset = ((vec2)(i,j) / ouputResolution.xy) * factor;            
            float s = extractLuma(read_imagef(input,sampler,tc + offset).xyz);
            float notCentre = min((float)(i*i + j*j),1.0f);
            accumY += s * (9.0f - notCentre*10.0f);
        }
    }
    
    accumY /= 9.0f;
    
	float gain = (float)(intensity)/40.0f;
    accumY = yuv.x + accumY * gain;
    
    vec3 retRGB = YUV2RGB((vec3)(accumY,yuv.y,yuv.z));
    
    vec4 fragColor = (vec4)(retRGB,orig.w); // sharpened
    vec4 retColor =  mix( fragColor, orig, 1.0f - (float)(alpha)/100.0f );
   
    write_imagef(dest_data,gl_FragCoord,retColor);
}

